##IJIC AFINT Replication
#Author: Ibrahim Kocaman, kocamani@erau.edu
#Related Publication:Kocaman, I. (2025). Exploring state development in Africa through intelligence: Introducing the African State Intelligence Agencies Dataset. International Journal of Intelligence and CounterIntelligence. https://doi.org/10.1080/08850607.2025.2520794

##Set WD--adjust as needed 
setwd("~/Desktop/DSS")
#load necessary libraries--install is necessary
library(tidyverse)
library(dplyr)
library(tidyr)
library(stargazer)
library(skimr)
library(stringr)

#load AFINT data
afint_agency<-read.csv("AFINT_agency.csv")

##TABLE 1. Generate descriptive statistics for agency data (Source: AFINT data, Unit of Abnalysis: agency)
stargazer(ais, type = "text", 
          title = "Descriptive Statistics",
          digits = 2,  # Number of decimal places
          out = "descriptive_statistics.html")  # Export to HTML if needed

#REGRESSION MODELS
#load agency-year regression data
afint_agency_year<-read.csv("AFINT_Replication_agency-year.csv")

# TABLE 2. Export descriptive statistics for regression data (Source: AFINT data, Unit of Abnalysis: agency-year)
stargazer(afint_agency_year, type = "text", 
          title = "Descriptive Statistics",
          digits = 2,
          out = "descriptive_statistics.html")

# Run Poisson regression
model1 <- glm(age ~ sc_est + e_gdp + e_pop + v2x_polyarchy, data = afint_agency_year, family = "poisson")
# Summary of the model
summary(model1)

model2 <- glm(agency_count ~ sc_est + e_gdp + e_pop + v2x_polyarchy, data = afint_agency_year, family = "poisson")
# Summary of the model
summary(model2)

# Run OLS regression
model3 <- lm(sc_est ~ dummy_civil_war + e_gdp + e_pop + v2x_polyarchy, data = afint_agency_year)
# Summary of the model
summary(model3)

model4 <- lm(sc_est ~ dummy_repression + e_gdp + e_pop + v2x_polyarchy, data = afint_agency_year)
# Summary of the model
summary(model4)

# TABLE 3. Export Regression Results
stargazer(model1, model2, model3, model4, type = "html", 
          title = "Regression Results",
          dep.var.labels = c("Outcome Variable Label"),
          column.labels = c("", "Model 2", "Model 3", "Model 4"),
          covariate.labels = c("Predictor 1", "Predictor 2", "Predictor 3"),  # Adjust as needed
          out = "regression_results.html")  # Export as HTML

#-------------------------------------------------------------------------------
## DATA MANAGEMENT and MERGING--not necessary for replication 
#load state capacity data (Hanson and Rigman, 2021)
statcap<-read.csv("state-capacity-index.csv")
#load percentage of state controlled terr (Vdem)
perstatcont<-read.csv("percentage-of-territory-controlled-by-government.csv")
#merge state capacity and percent gov't control
merged_sc <- merge(statcap, perstatcont, by = c("Entity", "Year"))
#rename country variable 
merged_sc <- merged_sc %>%
  rename(Country = Entity)
#load cow codes
cow<-read.csv("COW-country-codes.csv")
cow <- cow %>%
  rename(Country = StateNme)
cow <- cow %>%
  rename(cow = CCode)
# Drop the observation for cow = 484
cow <- cow %>%
  filter(cow != "484")

# Change the value of Country for cow = 490
cow[cow$cow == "490", "Country"] <- "Congo"

#merge merged_sc and cow codes
merged_sccow <- merge(merged_sc, cow, by = "Country", all = TRUE)
# Drop unnecessary columns
merged_sccow <- merged_sccow %>% select(-Code.x, -Code.y)
merged_sccow <- merged_sccow %>%
  rename(year = Year)
#sort by cow-year
merged_sccow <- merged_sccow %>%
  arrange(cow, year)
#Merge the data frames on Entity and Year (inner join)
merged_cy_data <- merge(cow_year_data, merged_sccow, by = c("cow", "year"))
#rename percent territory variable 
merged_cy_data <- merged_cy_data %>%
  rename( terr_uc = Territory.under.state.control..best.estimate..aggregate..average.)
#rename state cap estimate variable 
merged_cy_data <- merged_cy_data %>%
  rename( sc_est = State.capacity.estimate)

##sort by ais_expanded by cow-year
ais_expanded <- ais_expanded %>%
  arrange(cow, year)

#Merge the cy_data and ais_expanded
ais_cya <- merge(ais_expanded, merged_cy_data, by = c("cow", "year"))

#drop unnecessary columns 
ais_cya <- ais_cya %>% select(-Country.x, -X, -X.1, -X.2)

summary(ais_cya$agency_count)

#create a variable for agency_age
# Calculate the age variable for all agency-country-years
ais_cya <- ais_cya %>%
  mutate(age = ifelse(year >= found & year <= dissolve, year - found, NA))

summary(ais_cya$age)

ais_cya <- ais_cya %>%
  arrange(cow, year, Agency)


table(ais_cya$orientation_legacy)

# Create dummy variable for "British"
ais <- ais %>%
  mutate(dummy_british = ifelse(grepl("British", orientation_legacy), 1, 0))

# Create dummy variable for "French" and "French, coup-borne"
ais <- ais %>%
  mutate(dummy_french = ifelse(grepl("French", orientation_legacy) | 
                                 orientation_legacy == "French, coup-borne", 
                               1, 0))
# Create dummy variable for "coup-borne"
ais <- ais %>%
  mutate(dummy_coup_borne = ifelse(grepl("coup-borne", orientation_legacy), 1, 0))

# Create dummy variable for "cold war east"
ais <- ais %>%
  mutate(dummy_cold_war_east = ifelse(grepl("cold war east", orientation_legacy), 1, 0))

# Create dummy variable for "American"
ais <- ais %>%
  mutate(dummy_american = ifelse(grepl("American", orientation_legacy), 1, 0))

# Create dummy variable for "Russian"
ais <- ais %>%
  mutate(dummy_russian = ifelse(grepl("Russian", orientation_legacy), 1, 0))

# Create dummy variable for "Indian"
ais <- ais %>%
  mutate(dummy_indian = ifelse(grepl("Indian", orientation_legacy), 1, 0))

#load African independence dates data
cow_indep<-read.csv("afr_cow_indep.csv")
# Only Keep cow code and independence date
cow_indep <- cow_indep %>%
  select(cow, independence)
# Check the resulting data frame
print(head(cow_indep))

# Merge data for h34
ais <- merge(ais, cow_indep, by = "cow")

table(ais$found)

# Merge data for h34
data_H2 <- merge(ais_cya, cow_indep, by = "cow")

# Create a new variable for years passed since independence until the agency was founded
data_H2 <- data_H2 %>%
  mutate(years_since_independence = found - independence)

# Create a new variable for years since independence or years since 1960, whichever is smaller
data_H2 <- data_H2 %>%
  mutate(years_since_independence = pmin(found - independence, found - 1960))

summary(data_H2$independence)
summary(data_H2$years_since_independence)


##create involvement dummies
data_H34<-data_H2
table(data_H34$involvement)

# Create dummy variables based on multiple keywords in the 'involvement' column
data_H34 <- data_H34 %>%
  mutate(
    dummy_repression = ifelse(str_detect(involvement, "repression|forced disappearances|summary executions|xtrajudicial killings|torture|abuse"), 1, 0),
    dummy_counterterrorism = ifelse(str_detect(involvement, "counterterrorism|counteterrrorism|counter-terrorism|counter-espionage"), 1, 0),
    dummy_civil_war = ifelse(str_detect(involvement, "civil war|civil conflict|civil unrest|political instability"), 1, 0),
    dummy_foreign_policy = ifelse(str_detect(involvement, "foreign policy|international affairs|diplomacy|state security|national security|immigration control"), 1, 0),
    # Add more dummy variables with multiple keywords as needed
  )

# View the updated data frame
print(head(data_H34))
# Drop unnecessary columns
data_H34 <- data_H34 %>% select(-peak_number)
data_H34 <- data_H34 %>% select(-direction_change)
data_H34 <- data_H34 %>%
  rename(Country = Country.y)

summary(data_H34$dummy_foreign_policy)
summary(data_H34$dummy_repression)
summary(data_H34$dummy_civil_war)
summary(data_H34$dummy_counterterrorism)

###load vdem data
vdem=read_csv("V-Dem-CY-Full+Others-v15.csv")
library(dplyr)

vdem_controls<-vdem %>%
  select(year, COWcode, e_gdp, e_gdppc, e_pop, v2x_polyarchy)

vdem_controls <- vdem_controls %>%
  rename(cow = COWcode)

# Merge data for Regression
data_H45 <- merge(data_H34, vdem_controls, by = c("cow", "year"))

#Export Replication Data
# Export the dataframe as a CSV
write.csv(data_H45, "AFINT_Replication_agency-year.csv", row.names = FALSE)

# Export the dataframe as an RData file
save(data_H45, file = "AFINT_Replication_agency-year.RData")


# Summarize the entire dataset
skim(ais_cya)

# Drop unnecessary columns
ais_cya <- ais_cya %>% select(-peak_number)
stargazer(ais_cya, type = "text", title = "Descriptive Statistics")

# Create a regression table
stargazer(model, model2, poisson_model, poisson_model2, type = "text",
          title = "Regression Results",
          covariate.labels = c("Independent Variable 1", "Independent Variable 2"),
          dep.var.labels = "Age",
          out = "table1.txt")  # Save to a text file or change type to "html" or "latex"

# Install and load modelsummary if not already installed
if (!require(modelsummary)) install.packages("modelsummary")
library(modelsummary)
library(pandoc)

# Produce a summary table
modelsummary(list(model, model2, poisson_model, poisson_model2), 
             stars = TRUE,  # Add stars for significance levels
             statistic = "({std.error})",  # Include standard errors in parentheses
             output = "table2.docx")  # Output to HTML; can also specify "latex" or "word"

#------------------------------------------------------------------------------
##Mapping and Other Descriptive Visualizations--dataframe names and file paths may need to be adjusted 
# Install and load necessary packages
if (!require(ggplot2)) install.packages("ggplot2")
if (!require(sf)) install.packages("sf")
if (!require(dplyr)) install.packages("dplyr")

library(ggplot2)
library(sf)
library(dplyr)


# Install and load rnaturalearth package if not already installed
if (!require(rnaturalearth)) install.packages("rnaturalearth")
if (!require(rnaturalearthdata)) install.packages("rnaturalearthdata")

library(rnaturalearth)
library(rnaturalearthdata)

# Load the shapefile for African countries
africa_map <- ne_countries(continent = "Africa", returnclass = "sf")

# Convert ISO3 codes in "iso_a3" to numeric COW codes in a new column "cow"
africa_map <- africa_map %>%
  mutate(cow = countrycode(iso_a3, origin = "iso3c", destination = "cown"))
table(africa_map$cow)

# Ensure the country codes in africa_map match your data
# Example data frame with agency counts

# Ensure the country codes in africa_map match your data
# Install and load the countrycode package if not already installed
if (!require(countrycode)) install.packages("countrycode")
library(countrycode)
# Add a new column "cow" to africa_map based on "iso_a3"
africa_map <- africa_map %>%
  mutate(cow = countrycode(iso_a3, origin = "iso3c", destination = "cown"))
table(africa_map$cow)
print(africa_map$sovereignt[africa_map$cow == 420])

# Calculate the age variable for merged_cy_data
merged_cy_data <- merged_cy_data %>%
  mutate(age = ifelse(year >= found & year <= dissolve, year - found, NA))

# get age and count for mapping
library(dplyr)

# Assuming your dataset is named df
age_count <- data_H45 %>%
  select(cow, agency_count, age)

#calculate oldest agency age and max count 
age_count <- age_count %>%
  group_by(cow) %>%
  mutate(max_agency_count = max(agency_count, na.rm = TRUE),
         max_age = max(age, na.rm = TRUE)) %>%
  ungroup()

summary(age_count$max_agency_count)
summary(age_count$max_age)

#create a dataframe with one observation per cow
age_count_summary <- age_count %>%
  group_by(cow) %>%
  summarise(
    max_agency_count = max(agency_count, na.rm = TRUE),
    max_age = max(age, na.rm = TRUE)
  )

# Change "iso_a3" to your country code column in africa_map if different
merged_4map_ais <- africa_map %>%
  left_join(age_count_summary, by = "cow")

summary(merged_4map_ais$max_age)
summary(merged_4map_ais$max_agency_count)

# Plot map with agency counts
ggplot(data = merged_4map_ais) +
  geom_sf(aes(fill = max_age)) +
  scale_fill_gradient(low = "lightblue", high = "darkblue", na.value = "grey80", name = "Agency Age") +
  labs(title = "Intelligence history by Country in Africa",
       subtitle = "Age of oldest agency in each African country",
       caption = "Data Source: AFINT Dataset") +
  theme_minimal() +
  theme(legend.position = "bottom",axis.text = element_text(size = 6)) +
  coord_sf(xlim = c(-20, 55), ylim = c(-35, 40), expand = FALSE)

# Adjust the aspect ratio and set limits for Africa's bounding box
ggplot(data = merged_4map_ais) +
  geom_sf(aes(fill = max_agency_count)) +
  scale_fill_gradient(low = "lightblue", high = "darkblue", na.value = "grey80", name = "Agency Count") +
  labs(title = "Agency Counts by Country in Africa",
       subtitle = "Number of agencies in each African country",
       caption = "Data Source: AFINT Dataset") +
  theme_minimal() +
  theme(legend.position = "bottom",axis.text = element_text(size = 6)) +
  coord_sf(xlim = c(-20, 55), ylim = c(-35, 40), expand = FALSE)

library(ggplot2)
library(reshape2)

# Merge with africa_map on cow
map_data <- africa_map %>%
  left_join(ais, by = "cow")

table(map_data$sovereignt, map_data$orientation_category)

# Plot the map with centered title and subtitle
ggplot(data = map_data) +
  geom_sf(aes(fill = orientation_category), color = "white") +
  scale_fill_manual(values = c("British" = "blue", "French" = "red", 
                               "American" = "green", "Russian" = "darkred",
                               "Indian" = "purple", "Cold War East" = "orange", 
                               "Coup-borne" = "brown", "Other" = "yellow"),
                    na.value = "lightgrey", name = "Orientation Legacy") +
  labs(title = "Orientation/legacy of Intelligence Agencies in Africa",
       subtitle = "Categorized by Dominant Orientation",
       caption = "Data Source: AFINT Dataset") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(size = 12, hjust = 0.5),        # Center-align title
    plot.subtitle = element_text(size = 10, hjust = 0.5),     # Center-align subtitle
    plot.caption = element_text(size = 8),                   # Caption font size
    axis.text = element_text(size = 6),                       # Axis text font size
    legend.text = element_text(size = 6),                     # Legend text font size
    legend.title = element_text(size = 7)                     # Legend title font size
  )


table(ais$purpose)
# Create a summary table for the "purpose" variable
purpose_summary <- as.data.frame(table(ais$purpose))
colnames(purpose_summary) <- c("Purpose", "Count")
print(purpose_summary)

# Plot the purpose breakdown
ggplot(purpose_summary, aes(x = reorder(Purpose, -Count), y = Count)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Breakdown of Agency Purposes",
       x = "Purpose",
       y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))  # Rotate x-axis labels for readability


# Step 1: Separate multiple purposes within each entry
# Create a long-format table with each purpose as a separate row
clean_purpose <- ais %>%
  mutate(purpose = strsplit(as.character(purpose), ",\\s*")) %>%  # Split by comma and optional whitespace
  unnest(purpose)  # Expand each purpose into its own row

# Step 2: Count the occurrences of each unique purpose
purpose_counts <- clean_purpose %>%
  count(purpose, sort = TRUE)  # Count each purpose and sort by frequency

# Print the cleaner table of purposes
print(purpose_counts)

hist(purpose_counts)

# Install and load gt if not already installed
if (!require(gt)) install.packages("gt")
library(gt)

# Create the gt table for purpose counts
purpose_counts %>%
  gt() %>%
  tab_header(
    title = "Breakdown of Agency Purposes",
    subtitle = "Each purpose counted individually across agencies"
  ) %>%
  cols_label(
    purpose = "Purpose",
    n = "Count"
  ) %>%
  fmt_number(
    columns = c(n),
    decimals = 0
  ) %>%
  tab_options(
    table.font.size = 12
  )
# Install and load kableExtra if not already installed
if (!require(kableExtra)) install.packages("kableExtra")
library(kableExtra)

# Create the kableExtra table for purpose counts
purpose_counts %>%
  kable("html", col.names = c("Purpose", "Count"), caption = "Breakdown of Agency Purposes") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  column_spec(2, bold = TRUE) %>%
  row_spec(0, bold = TRUE, color = "white", background = "#333333")

# Load ggplot2 if not already loaded
library(ggplot2)

# Create the bar plot
ggplot(purpose_counts, aes(x = reorder(purpose, n), y = n)) +
  geom_bar(stat = "identity", fill = "skyblue", color = "black") +
  labs(title = "Breakdown of Agency Purposes",
       subtitle = "Each purpose counted individually across agencies",
       x = "Purpose",
       y = "Count") +
  theme_minimal() +
  coord_flip() +  # Flip coordinates for better readability
  theme(
    plot.title = element_text(size = 12, face = "bold"),
    plot.subtitle = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 9)
  )


# Separate multiple purposes and count each purpose per country
country_purpose_counts <- ais %>%
  mutate(purpose = strsplit(as.character(purpose), ",\\s*")) %>%
  unnest(purpose) %>%
  count(cow, purpose, sort = TRUE)

# Print the result for verification
print(country_purpose_counts)

# Create a stacked bar plot of purposes by country
ggplot(country_purpose_counts, aes(x = factor(cow), y = n, fill = purpose)) +
  geom_bar(stat = "identity") +
  labs(title = "Breakdown of Agency Purposes by Country",
       x = "Country (COW Code)",
       y = "Count of Purposes",
       fill = "Purpose") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.title = element_text(size = 11),
    legend.position = "right"
  )

# Separate multiple purposes and count each purpose per country
country_purpose_counts <- ais %>%
  mutate(purpose = strsplit(as.character(purpose), ",\\s*")) %>%
  unnest(purpose) %>%
  count(Country, purpose, sort = TRUE)

# Consolidate similar categories and correct spelling errors
country_purpose_counts <- country_purpose_counts %>%
  mutate(purpose = str_trim(purpose)) %>%  # Remove leading/trailing spaces
  mutate(purpose = case_when(
    purpose == "counterintelligence " ~ "counterintelligence",
    purpose == "strategic intelligence " ~ "external intel",
    purpose == "secret operations " ~ "espionage",
    
    
    # Add other replacements as necessary
    TRUE ~ purpose  # Keeps all other categories unchanged
  ))


# Print the result for verification
table(country_purpose_counts$purpose)

# Create a stacked bar plot with a well-positioned legend to the right
ggplot(country_purpose_counts, aes(x = reorder(Country, -n), y = n, fill = purpose)) +
  geom_bar(stat = "identity") +
  labs(title = "Breakdown of Agency Purposes by Country",
       x = "Country",
       y = "Count of Purposes",
       fill = "Purpose") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold", hjust = 0.5),  # Center-align title
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.title = element_text(size = 11),
    legend.position = "right",               # Position legend to the right of the plot
    legend.title = element_text(size = 9, hjust = 0.5),  # Center-align legend title
    legend.text = element_text(size = 7),                  # Adjust legend text size
    legend.margin = margin(t = 20, r = 10, b = 10, l = 10)  # Fine-tune the margin around legend
  ) +
  guides(fill = guide_legend(title.position = "top"))       # Move legend title to top


# Load necessary libraries
library(sf)
library(dplyr)
library(ggplot2)

# Calculate the centroids for each country
africa_map_centroids <- africa_map %>%
  st_centroid() %>%
  mutate(
    lon = st_coordinates(.)[,1],
    lat = st_coordinates(.)[,2]
  )

# Prepare the data for plotting purposes with centroid coordinates
country_purpose_counts <- country_purpose_counts %>%
  left_join(africa_map_centroids, by = "Country")

# Plot the map with stacked bars for purposes
ggplot() +
  geom_sf(data = africa_map, fill = "lightgrey", color = "white") +
  geom_bar(data = country_purpose_counts, aes(x = 1, y = n, fill = purpose),
           stat = "identity", width = 0.5, position = "stack") +
  geom_sf(data = africa_map_centroids, aes(geometry = geometry)) +
  coord_sf() +
  labs(title = "Breakdown of Intelligence Agency Purposes in Africa",
       subtitle = "Each stacked bar represents purpose breakdown per country",
       fill = "Purpose") +
  scale_fill_manual(values = c("domestic intel" = "lightblue", 
                               "external intel" = "darkblue",
                               "military intel" = "orange",
                               "counterintelligence" = "purple",
                               "espionage" = "green",
                               "other" = "grey")) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5),
    legend.position = "right"
  )

# Ensure africa_map is an sf object; if not, load it as such (adjust file path as needed)
africa_map <- st_read("path_to_africa_shapefile.shp")

# Install and load rnaturalearth packages if not already installed
if (!require(rnaturalearth)) install.packages("rnaturalearth")
if (!require(rnaturalearthdata)) install.packages("rnaturalearthdata")

library(rnaturalearth)
library(sf)
library(dplyr)

# Load and filter the Africa map
africa_map <- ne_countries(scale = "medium", continent = "Africa", returnclass = "sf")
# Calculate centroids for African countries
africa_map_centroids <- africa_map %>%
  st_centroid() %>%
  mutate(
    lon = st_coordinates(.)[,1],  # Longitude
    lat = st_coordinates(.)[,2]   # Latitude
  )

# Install and load countrycode if not already installed
if (!require(countrycode)) install.packages("countrycode")
library(countrycode)

# Add COW codes to africa_map based on country names or ISO3 codes
africa_map <- africa_map %>%
  mutate(cow = countrycode(name, origin = "country.name", destination = "cown"))

# Merge to add COW codes to country_purpose_counts
country_purpose_counts <- country_purpose_counts %>%
  left_join(africa_map %>% select(Country = name, cow), by = "Country")

# Print the updated data frame to verify
print(head(country_purpose_counts))

# Assign cow = 490 for "Congo (DRC)"
country_purpose_counts <- country_purpose_counts %>%
  mutate(cow = ifelse(Country == "Congo (DRC)", 490, cow))

# Assign cow = 560 for "South Africa"
country_purpose_counts <- country_purpose_counts %>%
  mutate(cow = ifelse(Country == "South Africa ", 560, cow))

# Verify the change
print(country_purpose_counts %>% filter(Country == "Congo (DRC)"))

# Add COW codes to statcap based on the country names in the "Country" column
statcap <- statcap %>%
  mutate(cow = countrycode(Entity, origin = "country.name", destination = "cown"))

# Verify the added COW codes
print(head(statcap))


# Merge country_purpose_counts with merged_sccow by the cow column
merged_data_H34 <- country_purpose_counts %>%
  left_join(merged_sccow, by = "cow")

# Check the merged data to ensure it was successful
print(head(merged_data_H34))

data_H34 <- merged_data_H34 %>%
  rename(terr = Territory.under.state.control..best.estimate..aggregate..average.)


# Split the involvement column into separate rows and count each category
involvement_counts <- data_H34 %>%
  mutate(involvement = strsplit(as.character(involvement), ",\\s*")) %>%  # Split by comma and optional space
  unnest(involvement) %>%  # Expand each involvement type into its own row
  count(involvement, sort = TRUE)  # Count each involvement category

# View the breakdown of individual involvement types
print(involvement_counts)

# Load ggplot2 if not already loaded
library(ggplot2)

# Create a bar plot of involvement categories
ggplot(involvement_counts, aes(x = reorder(involvement, -n), y = n)) +
  geom_bar(stat = "identity", fill = "skyblue", color = "black") +
  labs(title = "Frequency of Involvement Categories",
       x = "Involvement Category",
       y = "Count") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8), # Rotate x-axis labels for readability
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 11)
  ) +
  coord_flip()  # Flip coordinates for better readability if there are many categories

# Separate multiple involvement and count each involvement per country
country_involvement_counts <- ais %>%
  mutate(involvement = strsplit(as.character(involvement), ",\\s*")) %>%
  unnest(involvement) %>%
  count(Country, involvement, sort = TRUE)

# Print the result for verification
print(country_involvement_counts)

# Load dplyr if not already loaded
library(dplyr)

# Consolidate similar categories and correct spelling errors
country_involvement_counts <- country_involvement_counts %>%
  mutate(involvement = str_trim(involvement)) %>%  # Remove leading/trailing spaces
  mutate(involvement = case_when(
    involvement == "forced disappearances" ~ "extrajudicial killings",
    involvement == "summary executions" ~ "extrajudicial killings",
    involvement == "political. imprisonment" ~ "political imprisonment",
    involvement == "counteterrrorism" ~ "counterterrorism",
    involvement == "civil unrest" ~ "civil war",
    involvement == "civil war negotiations" ~ "civil war",
    involvement == "media" ~ "censor",
    involvement == "censor" ~ "censoring",
    involvement == "coup-proofing" ~ "coups",
    involvement == "coups" ~ "coups & coup-proofing",
    involvement %in% c("abduction", "political  imprisonment", "political imprisonment")  ~ "repression",
    involvement %in% c("border security", "covert action", "fight against organized crime and transnational crimes", "state security", "immigration control", "interstate conflict") ~ "foreign policy",
    involvement %in% c("purge", "economy", "political police", "partisan")  ~ "domestic politics",
    
    # Add other replacements as necessary
    TRUE ~ involvement  # Keeps all other categories unchanged
  ))

# Check the consolidated categories
print(table(country_involvement_counts$involvement))


# Create a stacked bar plot with a well-positioned legend to the right
ggplot(country_involvement_counts, aes(x = reorder(Country, -n), y = n, fill = involvement)) +
  geom_bar(stat = "identity") +
  labs(title = "Breakdown of Agency Involvement by Country",
       x = "Country",
       y = "Count of Involvement",
       fill = "Involvement") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold", hjust = 0.5),  # Center-align title
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.title = element_text(size = 11),
    legend.position = "right",               # Position legend to the right of the plot
    legend.title = element_text(size = 9, hjust = 0.5),  # Center-align legend title
    legend.text = element_text(size = 7),                  # Adjust legend text size
    legend.margin = margin(t = 20, r = 2, b = 10, l = 10)  # Fine-tune the margin around legend
  ) +
  guides(fill = guide_legend(title.position = "top"))       # Move legend title to top

##END